///////////////////////
///  LTI modelling  ///
///////////////////////

// This do file shows the steps needed to calcualted the regression table shown in the main part of the paper and the fitted LTIs for each loan
// It uses the loan-level PSD dataset that can be accessed via the BoE

clear
global DIR "C:\XXX" 
cd "${DIR}"
//
use "${DIR}\PSDfull.dta" // Once access is granted BoE staff can point to this master dta file 
set matsize 20000
set more off, perm
drop if year > 2017 // drop recent
drop if LTV > 150 // likely errors
drop if LTI > 50 // likely errors
drop if age_borrower>70 // likely errors or tiny mortgages

keep year month bank_id loan_val property_val advtype ratetype age_borrower gross_income ///
repayment region LTV LTI interest mortgage_term income_basis

// Change income measure to "per capita"
sum gross_income if income_basis==2
sum gross_income if income_basis==3
replace income_basis = 2 if income_basis==1 & gross_income>=55000 // replace unknowns
replace income_basis = 3 if income_basis==1 & gross_income<55000 // replace unknowns 
gen pc_income = gross_income
replace pc_income = gross_income/2 if income_basis== 2 // new per capita measure

// Rescale and dummies
sort year month
tostring year, g(y)
tostring month, g(m)
gen year_month = y + "_0" + m if month<10
replace year_month = y + "_" + m if month>9
egen ym = group(year_month)
gen quarter = 1 if month==1 | month==2 | month==3
replace quarter = 2 if month==4 | month==5 | month==6
replace quarter = 3 if month==7 | month==8 | month==9
replace quarter = 4 if month==10 | month==11 | month==12
tostring quarter, g(q)
gen year_quarter = y + "_" + q
egen yq = group(year_quarter)
drop if advtype <= 2 | advtype == 5 | advtype == 6 //drop business, council, unknown, other mortgages
egen mort  = group(advtype) 
drop if ratetype == 4 | ratetype == 6 //drop niche products capped, other
egen rate  = group(ratetype) 
drop if region ==.
gen pre_nrock = 0 
replace pre_nrock = 1 if year <2007 | (year == 2007 & month <=8) // redundant since co-linear
gen term_temp = 0
replace term_temp =1 if mortgage_term >18 & mortgage_term <=25  
replace term_temp =2 if mortgage_term >25 
egen term = group(term_temp)
egen reg   = group(region)
egen basis = group(income_basis) 
gen pc_inc = pc_income/10000   // scale by 10k for table
gen pc_inc_sq = pc_inc*pc_inc 
gen loan   = loan_val/1000000       // scale by 1m for table
gen hp     = property_val/1000000   // scale by 1m for table
egen inc_thresh = pctile(pc_inc), p(75)
egen age_thresh = pctile(age_borrower), p(50)

// PTI calculation
gen PMT = (loan_val * interest/100) / (1 - (1+interest/100)^(-(mortgage_term)))
replace PMT = (loan_val * interest/100) if repayment != 1 & repayment != 4
gen PTI = PMT/gross_income
drop if PTI ==.

// Remortgaging
gen remortg = 0
replace remortg = 1 if advtype==8

// LTV_buckut
gen LTV_buck = .
replace LTV_buck = 1 if LTV<=60
replace LTV_buck = 2 if LTV>60 & LTV <= 75
replace LTV_buck = 3 if LTV>75 & LTV <= 90
replace LTV_buck = 4 if LTV>90
drop if LTV_buck ==.
tostring LTV_buck, g(bucket)
egen ltv_bucket = group(bucket)
drop LTV_buck

// PC_inc bucket
gen Inc_buck = .
replace Inc_buck = 1 if pc_inc<=0.5
forv xx = 2/40 {
replace Inc_buck = `xx' if pc_inc>(`xx'-1)/2 & pc_inc <= `xx'/2
}
replace Inc_buck = 41 if pc_inc>20
drop if Inc_buck ==.

// Capturing trend changes
egen med_lti_all = pctile(LTI), p(50)
egen iqr_lti_all = iqr(LTI)
bysort ym: egen iqr_lti_ym = iqr(LTI)
bysort ym: egen med_lti_ym = pctile(LTI), p(50)
bysort ym: egen med_loan_ym = pctile(loan), p(50)
bysort ym: egen med_hp_ym =  pctile(hp), p(50)
bysort ym: egen med_inc_ym =  pctile(pc_inc), p(50)
bysort ym: egen p95_loan_ym = pctile(loan), p(95)
bysort ym: egen mean_lti_ym = mean(LTI)
bysort ym: egen mean_loan_ym = mean(loan)
bysort ym: egen mean_hp_ym = mean(hp)
bysort ym: egen mean_inc_ym = mean(pc_inc)

// Smaller dataset for manipulation
save "${DIR}\Smaller_dataset.dta", replace
scalar drop _all


//---------- TABLE 1 
** Column 1
reg LTI i.yq i.mort i.rate i.reg i.basis i.bank_id i.ltv_bucket hp interest PTI age_borrower
** Column 2
reg LTI i.yq i.mort i.rate i.reg i.basis i.bank_id i.ltv_bucket hp interest PTI age_borrower pc_inc pc_inc_sq  
** Column 3
reg LTI i.yq i.mort i.rate i.reg i.basis i.bank_id i.ltv_bucket hp interest PTI age_borrower pc_inc pc_inc_sq LTV mean_lti_ym mean_loan_ym

//----- Alternative specifications for Appendix
//log with pc_inc
*reg log_lti i.yq i.mort i.rate i.reg i.basis i.bank_id log_ltv log_hp pc_inc pc_inc_sq interest PTI age_borrower mean_lti_ym mean_loan_ym
//log without pc_inc
*reg log_lti i.yq i.mort i.rate i.reg i.basis i.bank_id log_ltv log_hp interest PTI age_borrower mean_lti_ym mean_loan_ym
//level without pc_inc
*reg LTI i.yq i.mort i.rate i.reg i.basis i.bank_id i.ltv_bucket hp interest PTI age_borrower LTV mean_lti_ym mean_loan_ym

predict lti_fitted, xb
predict lti_res, res

/// Coefficients for exclude
gen coef_age = _b[age_borrower]
gen coef_inc = _b[pc_inc]
gen coef_hp = _b[hp]
gen coef_loan = _b[loan]
gen coef_med_loan_ym = _b[med_loan_ym]
gen coef_med_lti_ym = _b[med_lti_ym]
gen coef_med_hp_ym = _b[med_hp_ym]
gen coef_mean_lti_ym = _b[mean_lti_ym]
gen coef_mean_loan_ym = _b[mean_loan_ym]
gen coef_mean_hp_ym = _b[mean_hp_ym]
gen coef_mean_inc_ym = _b[mean_inc_ym]

// FIT
gen lti_fit_xage  = lti_fitted - (age_borrower*coef_age)
gen lti_fit_xinc  = lti_fitted - (pc_inc*coef_inc)
gen lti_fit_xhp   = lti_fitted - (hp*coef_hp)

// Last data points
gen med_loan_ym_last = med_loan_ym[_N]
gen med_lti_ym_last = med_lti_ym[_N]
gen med_hp_ym_last = med_hp_ym[_N]
gen mean_hp_ym_last = mean_hp_ym[_N]
gen mean_inc_ym_last = mean_inc_ym[_N]
gen mean_loan_ym_last = mean_loan_ym[_N]
gen mean_lti_ym_last = mean_lti_ym[_N]

// Adjustment
gen lti_adj = lti_fitted - (mean_lti_ym*coef_mean_lti_ym) + (mean_lti_ym_last*coef_mean_lti_ym) - (mean_loan_ym*coef_mean_loan_ym) + (mean_loan_ym_last*coef_mean_loan_ym) //
gen lti_adj_xltv  = lti_fit_xltv  - (mean_lti_ym*coef_mean_lti_ym) + (mean_lti_ym_last*coef_mean_lti_ym) - (mean_loan_ym*coef_mean_loan_ym) + (mean_loan_ym_last*coef_mean_loan_ym) //
gen lti_adj_xpti  = lti_fit_xpti  - (mean_lti_ym*coef_mean_lti_ym) + (mean_lti_ym_last*coef_mean_lti_ym) - (mean_loan_ym*coef_mean_loan_ym) + (mean_loan_ym_last*coef_mean_loan_ym) //

egen lti_95pct = pctile(LTI), p(95)

egen lti_fit_90pct = pctile(lti_fitted), p(90)
egen lti_fit_95pct = pctile(lti_fitted), p(95)

drop if lti_adj==.
drop if lti_adj<0

egen lti_adj_75pct = pctile(lti_adj), p(75)
egen lti_adj_90pct = pctile(lti_adj), p(90)
egen lti_adj_95pct = pctile(lti_adj), p(95)
egen lti_adj_975pct = pctile(lti_adj), p(97.5)

egen lti_adj_xltv_95pct  = pctile(lti_adj_xltv) , p(95)
egen lti_adj_xpti_95pct  = pctile(lti_adj_xpti) , p(95)

//------Raw share
* 95%
gen lti_95 = 0
replace lti_95 = 1 if LTI>= lti_95pct //4.58  // BASELINE

* 99%
gen lti_99 = 0
replace lti_99 = 1 if LTI>=lti_99pct //5.33

//------Fitted LTI
* 95%
gen lti_fit_95 = 0
replace lti_fit_95 = 1 if lti_fitted>= lti_fit_95pct  // BASELINE

//------Adjusted = Fitted LTI + MedianLTI and MeanHP rebased

* Percentiles
sum lti_adj, detail
* 75%
gen lti_adj_75 = 0
replace lti_adj_75= 1 if lti_adj >= lti_adj_75pct 
* 90%
gen lti_adj_90 = 0
replace lti_adj_90 = 1 if lti_adj >= lti_adj_90pct 
* 95%
gen lti_adj_95 = 0
replace lti_adj_95 = 1 if lti_adj >= lti_adj_95pct // BASELINE
* 97.5%
gen lti_adj_975 = 0
replace lti_adj_975 = 1 if lti_adj >= lti_adj_975pct

*ex LTV
gen lti_adj_ex_ltv = 0
replace lti_adj_ex_ltv = 1 if lti_adj_xltv>= lti_adj_xltv_95pct
*ex PTI
gen lti_adj_ex_pti = 0
replace lti_adj_ex_pti = 1 if lti_adj_xpti>= lti_adj_xpti_95pct

*exLSE
gen lti_adj_95_exl = 0
replace lti_adj_95_exl = 1 if lti_adj>=lti_adj_95pct & reg!=5 & reg!=6
*Income
gen lti_adj_95_poor = 0
replace lti_adj_95_poor = 1 if lti_adj>=lti_adj_95pct & pc_inc<inc_thresh
*LTV u75
gen lti_adj_95_ltv75 = 0
replace lti_adj_95_ltv75 = 1 if lti_adj>=lti_adj_95pct & LTV>=75
*PTI
gen lti_adj_95_pti = 0
replace lti_adj_95_pti = 1 if lti_adj>=lti_adj_95pct & PTI>0.3
*Young
gen lti_adj_95_old = 0
replace lti_adj_95_old = 1 if lti_adj>=lti_adj_95pct & age_borrower>age_thresh

//-------Collapse 
collapse (mean) LTI loan hp age_borrower pc_inc pc_inc_sq interest LTV PTI ///
med_lti_all med_lti_ym iqr_lti_all iqr_lti_ym remortg lti_4u lti_5u lti_95 lti_99 ///
lti_fitted lti_fit_95 ///
lti_adj lti_adj_75 lti_adj_90 lti_adj_95 lti_adj_975 ///
lti_adj_95_exl lti_adj_95_poor lti_adj_95_ltv75 lti_adj_95_pti lti_adj_95_old ///
lti_adj_ex_ltv lti_adj_ex_pti ///
, by(year month)

//--- Used to be merged with macro data in RESTAT_MS24757_Macro_data.dta
save "xxx\Micro_data_collapse.dta", replace

** ENDS
